u16 maximum value is 0x10000 - 1 == (0x100 - 1) (0x100 + 1)
u8 maximum value is 0x100 - 1
thus the direct conversion requires multiplication/division per
0x100+1
division per 0x100+1 is implemented using 1 term of (1 + x)^-1
MacLaurin expansion
(1+x)^-1 == 1 + (-1) x
a * (1 + 1/256)^-1 * (256^-1) == (a - a / 256) / 256
with integer division rounded toward the nearest.
while (n--)
{
- (*(unsigned char *) dst) = (*(unsigned short *) src) >> 8;
+#define div_257(a) ((((a)+128)-(((a)+128)>>8))>>8)
+ (*(unsigned char *) dst) = div_257 (*(unsigned short *) src);
dst += 1;
src += 2;
}
while (n--)
{
- (*(unsigned short *) dst) = (*(unsigned char *) src) << 8;
+ (*(unsigned short *) dst) = ((*(unsigned char *) src) << 8) | *src;
dst += 2;
src += 1;
}
while (n--)
{
- (*(unsigned char *) dst) = (*(unsigned short *) src) >> 8;
+#define div_257(a) ((((a)+128)-(((a)+128)>>8))>>8)
+ (*(unsigned char *) dst) = div_257 (*(unsigned short *) src);
dst += 1;
src += 2;
}
while (n--)
{
- (*(unsigned short *) dst) = (*(unsigned char *) src) << 8;
+ (*(unsigned short *) dst) = ((*(unsigned char *) src) << 8) | *src;
dst += 2;
src += 1;
}